{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AGE</th>\n",
       "      <th>GENDER</th>\n",
       "      <th>MARRIAGE</th>\n",
       "      <th>EDU_EXPERIENCE</th>\n",
       "      <th>WORK_SIZE</th>\n",
       "      <th>WORK_POWER</th>\n",
       "      <th>IS_ILLEGAL_HIS</th>\n",
       "      <th>CURR_FREEZE_VALUE</th>\n",
       "      <th>GRADUATE_YEAR</th>\n",
       "      <th>OCCUPATION</th>\n",
       "      <th>OCCUPATION_TYPE</th>\n",
       "      <th>VIP_FLAG</th>\n",
       "      <th>GRAY_FLAG</th>\n",
       "      <th>FIVE_CLASS_TYPE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15735</th>\n",
       "      <td>51</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>99</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15741</th>\n",
       "      <td>56</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>60</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15753</th>\n",
       "      <td>45</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>70</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>9</td>\n",
       "      <td>z</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15788</th>\n",
       "      <td>41</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>70</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>z</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15797</th>\n",
       "      <td>42</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>70</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       AGE  GENDER  MARRIAGE  EDU_EXPERIENCE  WORK_SIZE  WORK_POWER  \\\n",
       "15735   51       1         2              99          2           1   \n",
       "15741   56       1         2              60          2           1   \n",
       "15753   45       1         2              70          2           1   \n",
       "15788   41       1         2              70          2           1   \n",
       "15797   42       1         3              70          3           1   \n",
       "\n",
       "       IS_ILLEGAL_HIS  CURR_FREEZE_VALUE  GRADUATE_YEAR  OCCUPATION  \\\n",
       "15735             2.0                0.0            4.0           9   \n",
       "15741             2.0                0.0            4.0           9   \n",
       "15753             2.0                0.0            3.0           9   \n",
       "15788             2.0                0.0            4.0           9   \n",
       "15797             2.0                0.0            4.0           9   \n",
       "\n",
       "      OCCUPATION_TYPE  VIP_FLAG  GRAY_FLAG  FIVE_CLASS_TYPE  \n",
       "15735               5         0          0                0  \n",
       "15741               5         0          0                0  \n",
       "15753               z         0          0                0  \n",
       "15788               z         0          0                0  \n",
       "15797               5         0          0                1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_path = './test2.csv'\n",
    "data = pd.read_csv(file_path,index_col=0)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((504, 10), (504,))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numerical = ['AGE', 'WORK_SIZE', 'CURR_FREEZE_VALUE', 'GRADUATE_YEAR']\n",
    "\n",
    "categorical = ['EDU_EXPERIENCE', 'MARRIAGE', 'OCCUPATION', 'OCCUPATION_TYPE']\n",
    "\n",
    "binary = ['GENDER', 'WORK_POWER']\n",
    "\n",
    "train_X = data[numerical + categorical + binary]\n",
    "train_Y = data['FIVE_CLASS_TYPE']\n",
    "\n",
    "train_X.shape,train_Y.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "字段|中文|类型\n",
    "--|--|--\n",
    "AGE|年龄|数值\n",
    "WORK_SIZE|劳动人口数|数值\n",
    "CURR_FREEZE_VALUE|账户冻结金额|数值\n",
    "GRADUATE_YEAR|工作年限|数值\n",
    "EDU_EXPERIENCE|最高学历|类别\n",
    "MARRIAGE|结婚|类别\n",
    "OCCUPATION|职务|类别\n",
    "OCCUPATION_TYPE|职业类型|类别\n",
    "GENDER|性别|二值\n",
    "WORK_POWER|劳动能力|二值\n",
    "IS_ILLEGAL_HIS|是否非法|删除\n",
    "VIP_FLAG|白名单客户|删除\n",
    "GRAY_FLAG|灰名单客户|删除\n",
    "FIVE_CLASS_TYPE|五级分类|目标值\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 类别型变量进行One-hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>EDU_EXPERIENCE_10</th>\n",
       "      <th>EDU_EXPERIENCE_20</th>\n",
       "      <th>EDU_EXPERIENCE_30</th>\n",
       "      <th>EDU_EXPERIENCE_40</th>\n",
       "      <th>EDU_EXPERIENCE_50</th>\n",
       "      <th>EDU_EXPERIENCE_60</th>\n",
       "      <th>EDU_EXPERIENCE_70</th>\n",
       "      <th>EDU_EXPERIENCE_80</th>\n",
       "      <th>EDU_EXPERIENCE_90</th>\n",
       "      <th>EDU_EXPERIENCE_99</th>\n",
       "      <th>...</th>\n",
       "      <th>OCCUPATION_4</th>\n",
       "      <th>OCCUPATION_9</th>\n",
       "      <th>OCCUPATION_TYPE_0</th>\n",
       "      <th>OCCUPATION_TYPE_1</th>\n",
       "      <th>OCCUPATION_TYPE_3</th>\n",
       "      <th>OCCUPATION_TYPE_4</th>\n",
       "      <th>OCCUPATION_TYPE_5</th>\n",
       "      <th>OCCUPATION_TYPE_6</th>\n",
       "      <th>OCCUPATION_TYPE_y</th>\n",
       "      <th>OCCUPATION_TYPE_z</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15735</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15741</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15753</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15788</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15797</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       EDU_EXPERIENCE_10  EDU_EXPERIENCE_20  EDU_EXPERIENCE_30  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  0   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  0                  0   \n",
       "\n",
       "       EDU_EXPERIENCE_40  EDU_EXPERIENCE_50  EDU_EXPERIENCE_60  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  1   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  0                  0   \n",
       "\n",
       "       EDU_EXPERIENCE_70  EDU_EXPERIENCE_80  EDU_EXPERIENCE_90  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  0   \n",
       "15753                  1                  0                  0   \n",
       "15788                  1                  0                  0   \n",
       "15797                  1                  0                  0   \n",
       "\n",
       "       EDU_EXPERIENCE_99        ...          OCCUPATION_4  OCCUPATION_9  \\\n",
       "15735                  1        ...                     0             1   \n",
       "15741                  0        ...                     0             1   \n",
       "15753                  0        ...                     0             1   \n",
       "15788                  0        ...                     0             1   \n",
       "15797                  0        ...                     0             1   \n",
       "\n",
       "       OCCUPATION_TYPE_0  OCCUPATION_TYPE_1  OCCUPATION_TYPE_3  \\\n",
       "15735                  0                  0                  0   \n",
       "15741                  0                  0                  0   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  0                  0   \n",
       "\n",
       "       OCCUPATION_TYPE_4  OCCUPATION_TYPE_5  OCCUPATION_TYPE_6  \\\n",
       "15735                  0                  1                  0   \n",
       "15741                  0                  1                  0   \n",
       "15753                  0                  0                  0   \n",
       "15788                  0                  0                  0   \n",
       "15797                  0                  1                  0   \n",
       "\n",
       "       OCCUPATION_TYPE_y  OCCUPATION_TYPE_z  \n",
       "15735                  0                  0  \n",
       "15741                  0                  0  \n",
       "15753                  0                  1  \n",
       "15788                  0                  1  \n",
       "15797                  0                  0  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dummies = pd.get_dummies(data[categorical],columns=categorical)\n",
    "data_dummies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(504, 34)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_X = pd.concat([data[numerical+binary],data_dummies],axis=1)\n",
    "train = pd.concat([train_X,train_Y],axis=1)\n",
    "train.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取卡方值\n",
    "对年龄做探索性的分箱\n",
    "\n",
    "命中率，最理想的样本选择命中率是3：1~5：1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.24404761904761904"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pos_cnt = train_Y.sum()   # 命中率(坏人)\n",
    "all_cnt = train_Y.count() # 所有人\n",
    "expected_ratio = float(pos_cnt)/all_cnt\n",
    "expected_ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "col = 'AGE'\n",
    "target = 'FIVE_CLASS_TYPE'\n",
    "df = train[[col,target]]\n",
    "df=df.dropna()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cal_woe(row,good,bad):\n",
    "    '''\n",
    "    计算woe值\n",
    "    '''\n",
    "    yi=0.01 if row['hit']==0 else row['hit']\n",
    "    ni=row['all']-row['hit']\n",
    "    ni=0.02 if ni==0 else ni\n",
    "    return np.log((yi/bad)/(ni/good))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def cal_iv(row,good,bad):\n",
    "    '''\n",
    "    计算IV值\n",
    "    '''\n",
    "    yi=0.01 if row['hit']==0 else row['hit']\n",
    "    ni=row['all']-row['hit']\n",
    "    ni=0.02 if ni==0 else ni\n",
    "    return (yi/bad-ni/good)*row['woe']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def merge_shiSquare(df_count,minIndex,mergeIndex,col='AGE'):\n",
    "    df_count[col]= df_count[col].astype(np.str)\n",
    "    col_name=df_count.loc[minIndex,col]+\"~\"+df_count.loc[mergeIndex,col] # 将列名拼接\n",
    "    col_names=col_name.split(\"~\") # 切分成列表\n",
    "    col_names=[float(n) for n in col_names] # 转成数值用来排序\n",
    "    col_names.sort() # 排序\n",
    "    df_count.loc[mergeIndex,col]= str(col_names[0])+\"~\"+str(col_names[-1]); # 把最大值和最小值拼接成表签名\n",
    "    for c in ('count', 'hit', 'all', 'expected_cnt'): \n",
    "        df_count.loc[mergeIndex,c]+=df_count.loc[minIndex,c] # 所有列的值相加\n",
    "    # 卡方值重新计算\n",
    "    df_count.loc[mergeIndex,'chi_sequare']=(df_count.loc[mergeIndex,'hit']-df_count.loc[mergeIndex,'expected_cnt'])**2/df_count.loc[mergeIndex,'expected_cnt']\n",
    "    df_count.drop(index=minIndex,inplace=True) # 删除被合并的值\n",
    "    #df_count=df_count.reset_index()\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def chi_sequare_cal(hit,expected_count):\n",
    "    return (hit-expected_count)**2/expected_count\n",
    "\n",
    "def handerCol(df,col,target,maxInterval=5):\n",
    "    df_count = df[col].value_counts().sort_index().reset_index().rename(columns={\"index\":col,col:\"count\"})\n",
    "    df_count['hit']=df_count.apply(lambda a:train.loc[train[col]==a[col],target].sum(),axis=1)\n",
    "    df_count['all']=df_count.apply(lambda a:train.loc[train[col]==a[col],target].count(),axis=1)\n",
    "    df_count['expected_cnt']=df_count['all']*expected_ratio\n",
    "    df_count['chi_sequare']=df_count.apply(lambda row:chi_sequare_cal(row['hit'],row['expected_cnt']),axis=1)\n",
    "\n",
    "    \n",
    "    while df_count.shape[0]>maxInterval: # 保存5个分箱\n",
    "        min_index = df_count[df_count['chi_sequare']==df_count['chi_sequare'].min()].index.tolist()[0] # 最小值索引\n",
    "        if min_index>0 and min_index<df_count.shape[0] and min_index<df_count.shape[0]-1:\n",
    "            diff_sqr_val = df_count.loc[min_index-1,'chi_sequare']-df_count.loc[min_index+1,'chi_sequare'] # 根据卡方值确定合并上一行还是下一行\n",
    "        if min_index==0 or diff_sqr_val>0 and min_index<df_count.shape[0]-1: # 合并的索引号\n",
    "            merge_index = min_index+1\n",
    "        else :\n",
    "            merge_index = min_index-1\n",
    "        merge_shiSquare(df_count,min_index,merge_index,col) # 合并分箱\n",
    "        df_count.index=range(df_count.shape[0]) # 重置索引\n",
    "    [good,bad]=list(data[target].value_counts())\n",
    "    df_count['woe']=df_count.apply(cal_woe,axis=1,args=(good,bad))\n",
    "    df_count['iv']=df_count.apply(cal_iv,axis=1,args=(good,bad))\n",
    "    return df_count\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AGE</th>\n",
       "      <th>count</th>\n",
       "      <th>hit</th>\n",
       "      <th>all</th>\n",
       "      <th>expected_cnt</th>\n",
       "      <th>chi_sequare</th>\n",
       "      <th>woe</th>\n",
       "      <th>iv</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>21.0~22.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>0.536876</td>\n",
       "      <td>1.130615</td>\n",
       "      <td>0.006224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>23.0~27.0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>1.952381</td>\n",
       "      <td>0.464576</td>\n",
       "      <td>-0.815295</td>\n",
       "      <td>0.008351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28.0~57.0</td>\n",
       "      <td>467</td>\n",
       "      <td>110</td>\n",
       "      <td>467</td>\n",
       "      <td>113.970238</td>\n",
       "      <td>0.138306</td>\n",
       "      <td>-0.046640</td>\n",
       "      <td>0.001991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>58</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>3.578542</td>\n",
       "      <td>1.353759</td>\n",
       "      <td>0.040818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59.0~75.0</td>\n",
       "      <td>18</td>\n",
       "      <td>6</td>\n",
       "      <td>18</td>\n",
       "      <td>4.392857</td>\n",
       "      <td>0.587979</td>\n",
       "      <td>0.437468</td>\n",
       "      <td>0.007561</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         AGE  count  hit  all  expected_cnt  chi_sequare       woe        iv\n",
       "0  21.0~22.0      2    1    2      0.488095     0.536876  1.130615  0.006224\n",
       "1  23.0~27.0      8    1    8      1.952381     0.464576 -0.815295  0.008351\n",
       "2  28.0~57.0    467  110  467    113.970238     0.138306 -0.046640  0.001991\n",
       "3         58      9    5    9      2.196429     3.578542  1.353759  0.040818\n",
       "4  59.0~75.0     18    6   18      4.392857     0.587979  0.437468  0.007561"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_count = handerCol(df,col,target,5).head()\n",
    "df_count.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 分箱合并"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_box(data,col,num=5):\n",
    "    df_cnt_tmp=handerCol(data,col,target,num)\n",
    "    df_cnt_tmp['t']=col\n",
    "    df_cnt_tmp['iv_mount']=df_cnt_tmp['iv'].sum()\n",
    "    return df_cnt_tmp.rename(columns={col:\"box\"})\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "连续型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "boxs = []\n",
    "for c in numerical:\n",
    "    boxs.append(split_box(data,c,5))\n",
    "#split_box_rst=pd.concat(boxs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "二值型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "for c in binary:\n",
    "    boxs.append(split_box(data,c,5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "one-hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for c in data_dummies.columns:\n",
    "    boxs.append(split_box(data_dummies,c,5))\n",
    "split_box_rst=pd.concat(boxs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 筛选处iv值大于0.02的变量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_box_seleced = split_box_rst.loc[split_box_rst['iv_mount'].map(lambda r:r>0.02)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AGE',\n",
       " 'CURR_FREEZE_VALUE',\n",
       " 'EDU_EXPERIENCE_70',\n",
       " 'OCCUPATION_TYPE_4',\n",
       " 'OCCUPATION_1',\n",
       " 'OCCUPATION_TYPE_z',\n",
       " 'OCCUPATION_TYPE_1',\n",
       " 'EDU_EXPERIENCE_20',\n",
       " 'EDU_EXPERIENCE_99',\n",
       " 'EDU_EXPERIENCE_10',\n",
       " 'OCCUPATION_TYPE_3',\n",
       " 'OCCUPATION_TYPE_5']"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_X_cols = list(split_box_seleced['t'].value_counts().index)\n",
    "train_X_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>box</th>\n",
       "      <th>count</th>\n",
       "      <th>hit</th>\n",
       "      <th>all</th>\n",
       "      <th>expected_cnt</th>\n",
       "      <th>chi_sequare</th>\n",
       "      <th>woe</th>\n",
       "      <th>iv</th>\n",
       "      <th>t</th>\n",
       "      <th>iv_mount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>21.0~22.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>0.536876</td>\n",
       "      <td>1.130615</td>\n",
       "      <td>0.006224</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>23.0~27.0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>1.952381</td>\n",
       "      <td>0.464576</td>\n",
       "      <td>-0.815295</td>\n",
       "      <td>0.008351</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28.0~57.0</td>\n",
       "      <td>467</td>\n",
       "      <td>110</td>\n",
       "      <td>467</td>\n",
       "      <td>113.970238</td>\n",
       "      <td>0.138306</td>\n",
       "      <td>-0.046640</td>\n",
       "      <td>0.001991</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>58</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>3.578542</td>\n",
       "      <td>1.353759</td>\n",
       "      <td>0.040818</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59.0~75.0</td>\n",
       "      <td>18</td>\n",
       "      <td>6</td>\n",
       "      <td>18</td>\n",
       "      <td>4.392857</td>\n",
       "      <td>0.587979</td>\n",
       "      <td>0.437468</td>\n",
       "      <td>0.007561</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0~10000.0</td>\n",
       "      <td>386</td>\n",
       "      <td>105</td>\n",
       "      <td>386</td>\n",
       "      <td>94.202381</td>\n",
       "      <td>1.237639</td>\n",
       "      <td>0.146221</td>\n",
       "      <td>0.016980</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.244048</td>\n",
       "      <td>2.341609</td>\n",
       "      <td>5.042638</td>\n",
       "      <td>0.040732</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>-5.671780</td>\n",
       "      <td>0.133518</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>30000.0~98000.0</td>\n",
       "      <td>72</td>\n",
       "      <td>12</td>\n",
       "      <td>72</td>\n",
       "      <td>17.571429</td>\n",
       "      <td>1.766551</td>\n",
       "      <td>-0.478823</td>\n",
       "      <td>0.028691</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100000.0~500000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>5</td>\n",
       "      <td>36</td>\n",
       "      <td>8.785714</td>\n",
       "      <td>1.631243</td>\n",
       "      <td>-0.693934</td>\n",
       "      <td>0.028253</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>502</td>\n",
       "      <td>123</td>\n",
       "      <td>502</td>\n",
       "      <td>122.511905</td>\n",
       "      <td>0.001945</td>\n",
       "      <td>0.005263</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>EDU_EXPERIENCE_10</td>\n",
       "      <td>0.021566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>-4.167702</td>\n",
       "      <td>0.021539</td>\n",
       "      <td>EDU_EXPERIENCE_10</td>\n",
       "      <td>0.021566</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>476</td>\n",
       "      <td>120</td>\n",
       "      <td>476</td>\n",
       "      <td>116.166667</td>\n",
       "      <td>0.126495</td>\n",
       "      <td>0.043176</td>\n",
       "      <td>0.001780</td>\n",
       "      <td>EDU_EXPERIENCE_20</td>\n",
       "      <td>0.042580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>28</td>\n",
       "      <td>3</td>\n",
       "      <td>28</td>\n",
       "      <td>6.833333</td>\n",
       "      <td>2.150407</td>\n",
       "      <td>-0.989649</td>\n",
       "      <td>0.040800</td>\n",
       "      <td>EDU_EXPERIENCE_20</td>\n",
       "      <td>0.042580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>347</td>\n",
       "      <td>75</td>\n",
       "      <td>347</td>\n",
       "      <td>84.684524</td>\n",
       "      <td>1.107522</td>\n",
       "      <td>-0.157699</td>\n",
       "      <td>0.016425</td>\n",
       "      <td>EDU_EXPERIENCE_70</td>\n",
       "      <td>0.048762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>157</td>\n",
       "      <td>48</td>\n",
       "      <td>157</td>\n",
       "      <td>38.315476</td>\n",
       "      <td>2.447836</td>\n",
       "      <td>0.310468</td>\n",
       "      <td>0.032337</td>\n",
       "      <td>EDU_EXPERIENCE_70</td>\n",
       "      <td>0.048762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>409</td>\n",
       "      <td>109</td>\n",
       "      <td>409</td>\n",
       "      <td>99.815476</td>\n",
       "      <td>0.845114</td>\n",
       "      <td>0.118180</td>\n",
       "      <td>0.011674</td>\n",
       "      <td>EDU_EXPERIENCE_99</td>\n",
       "      <td>0.073387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>95</td>\n",
       "      <td>14</td>\n",
       "      <td>95</td>\n",
       "      <td>23.184524</td>\n",
       "      <td>3.638439</td>\n",
       "      <td>-0.624777</td>\n",
       "      <td>0.061714</td>\n",
       "      <td>EDU_EXPERIENCE_99</td>\n",
       "      <td>0.073387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>497</td>\n",
       "      <td>119</td>\n",
       "      <td>497</td>\n",
       "      <td>121.291667</td>\n",
       "      <td>0.043298</td>\n",
       "      <td>-0.025156</td>\n",
       "      <td>0.000620</td>\n",
       "      <td>OCCUPATION_1</td>\n",
       "      <td>0.035576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>1.708333</td>\n",
       "      <td>3.074187</td>\n",
       "      <td>1.418297</td>\n",
       "      <td>0.034956</td>\n",
       "      <td>OCCUPATION_1</td>\n",
       "      <td>0.035576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>488</td>\n",
       "      <td>115</td>\n",
       "      <td>488</td>\n",
       "      <td>119.095238</td>\n",
       "      <td>0.140820</td>\n",
       "      <td>-0.046031</td>\n",
       "      <td>0.002027</td>\n",
       "      <td>OCCUPATION_TYPE_1</td>\n",
       "      <td>0.051823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>3.904762</td>\n",
       "      <td>4.295006</td>\n",
       "      <td>1.130615</td>\n",
       "      <td>0.049796</td>\n",
       "      <td>OCCUPATION_TYPE_1</td>\n",
       "      <td>0.051823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>453</td>\n",
       "      <td>117</td>\n",
       "      <td>453</td>\n",
       "      <td>110.553571</td>\n",
       "      <td>0.375894</td>\n",
       "      <td>0.075678</td>\n",
       "      <td>0.005247</td>\n",
       "      <td>OCCUPATION_TYPE_3</td>\n",
       "      <td>0.066554</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>6</td>\n",
       "      <td>51</td>\n",
       "      <td>12.446429</td>\n",
       "      <td>3.338825</td>\n",
       "      <td>-0.884288</td>\n",
       "      <td>0.061307</td>\n",
       "      <td>OCCUPATION_TYPE_3</td>\n",
       "      <td>0.066554</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>452</td>\n",
       "      <td>116</td>\n",
       "      <td>452</td>\n",
       "      <td>110.309524</td>\n",
       "      <td>0.293551</td>\n",
       "      <td>0.067094</td>\n",
       "      <td>0.004106</td>\n",
       "      <td>OCCUPATION_TYPE_4</td>\n",
       "      <td>0.048790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>52</td>\n",
       "      <td>7</td>\n",
       "      <td>52</td>\n",
       "      <td>12.690476</td>\n",
       "      <td>2.551639</td>\n",
       "      <td>-0.730137</td>\n",
       "      <td>0.044684</td>\n",
       "      <td>OCCUPATION_TYPE_4</td>\n",
       "      <td>0.048790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>275</td>\n",
       "      <td>53</td>\n",
       "      <td>275</td>\n",
       "      <td>67.113095</td>\n",
       "      <td>2.967818</td>\n",
       "      <td>-0.301770</td>\n",
       "      <td>0.045804</td>\n",
       "      <td>OCCUPATION_TYPE_5</td>\n",
       "      <td>0.092888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>229</td>\n",
       "      <td>70</td>\n",
       "      <td>229</td>\n",
       "      <td>55.886905</td>\n",
       "      <td>3.563974</td>\n",
       "      <td>0.310206</td>\n",
       "      <td>0.047084</td>\n",
       "      <td>OCCUPATION_TYPE_5</td>\n",
       "      <td>0.092888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>426</td>\n",
       "      <td>110</td>\n",
       "      <td>426</td>\n",
       "      <td>103.964286</td>\n",
       "      <td>0.350407</td>\n",
       "      <td>0.075353</td>\n",
       "      <td>0.004891</td>\n",
       "      <td>OCCUPATION_TYPE_z</td>\n",
       "      <td>0.035973</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>78</td>\n",
       "      <td>13</td>\n",
       "      <td>78</td>\n",
       "      <td>19.035714</td>\n",
       "      <td>1.913763</td>\n",
       "      <td>-0.478823</td>\n",
       "      <td>0.031082</td>\n",
       "      <td>OCCUPATION_TYPE_z</td>\n",
       "      <td>0.035973</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 box  count  hit  all  expected_cnt  chi_sequare       woe  \\\n",
       "0          21.0~22.0      2    1    2      0.488095     0.536876  1.130615   \n",
       "1          23.0~27.0      8    1    8      1.952381     0.464576 -0.815295   \n",
       "2          28.0~57.0    467  110  467    113.970238     0.138306 -0.046640   \n",
       "3                 58      9    5    9      2.196429     3.578542  1.353759   \n",
       "4          59.0~75.0     18    6   18      4.392857     0.587979  0.437468   \n",
       "0        0.0~10000.0    386  105  386     94.202381     1.237639  0.146221   \n",
       "1            15000.0      1    1    1      0.244048     2.341609  5.042638   \n",
       "2            20000.0      9    0    9      2.196429     2.196429 -5.671780   \n",
       "3    30000.0~98000.0     72   12   72     17.571429     1.766551 -0.478823   \n",
       "4  100000.0~500000.0     36    5   36      8.785714     1.631243 -0.693934   \n",
       "0                  0    502  123  502    122.511905     0.001945  0.005263   \n",
       "1                  1      2    0    2      0.488095     0.488095 -4.167702   \n",
       "0                  0    476  120  476    116.166667     0.126495  0.043176   \n",
       "1                  1     28    3   28      6.833333     2.150407 -0.989649   \n",
       "0                  0    347   75  347     84.684524     1.107522 -0.157699   \n",
       "1                  1    157   48  157     38.315476     2.447836  0.310468   \n",
       "0                  0    409  109  409     99.815476     0.845114  0.118180   \n",
       "1                  1     95   14   95     23.184524     3.638439 -0.624777   \n",
       "0                  0    497  119  497    121.291667     0.043298 -0.025156   \n",
       "1                  1      7    4    7      1.708333     3.074187  1.418297   \n",
       "0                  0    488  115  488    119.095238     0.140820 -0.046031   \n",
       "1                  1     16    8   16      3.904762     4.295006  1.130615   \n",
       "0                  0    453  117  453    110.553571     0.375894  0.075678   \n",
       "1                  1     51    6   51     12.446429     3.338825 -0.884288   \n",
       "0                  0    452  116  452    110.309524     0.293551  0.067094   \n",
       "1                  1     52    7   52     12.690476     2.551639 -0.730137   \n",
       "0                  0    275   53  275     67.113095     2.967818 -0.301770   \n",
       "1                  1    229   70  229     55.886905     3.563974  0.310206   \n",
       "0                  0    426  110  426    103.964286     0.350407  0.075353   \n",
       "1                  1     78   13   78     19.035714     1.913763 -0.478823   \n",
       "\n",
       "         iv                  t  iv_mount  \n",
       "0  0.006224                AGE  0.064946  \n",
       "1  0.008351                AGE  0.064946  \n",
       "2  0.001991                AGE  0.064946  \n",
       "3  0.040818                AGE  0.064946  \n",
       "4  0.007561                AGE  0.064946  \n",
       "0  0.016980  CURR_FREEZE_VALUE  0.248174  \n",
       "1  0.040732  CURR_FREEZE_VALUE  0.248174  \n",
       "2  0.133518  CURR_FREEZE_VALUE  0.248174  \n",
       "3  0.028691  CURR_FREEZE_VALUE  0.248174  \n",
       "4  0.028253  CURR_FREEZE_VALUE  0.248174  \n",
       "0  0.000028  EDU_EXPERIENCE_10  0.021566  \n",
       "1  0.021539  EDU_EXPERIENCE_10  0.021566  \n",
       "0  0.001780  EDU_EXPERIENCE_20  0.042580  \n",
       "1  0.040800  EDU_EXPERIENCE_20  0.042580  \n",
       "0  0.016425  EDU_EXPERIENCE_70  0.048762  \n",
       "1  0.032337  EDU_EXPERIENCE_70  0.048762  \n",
       "0  0.011674  EDU_EXPERIENCE_99  0.073387  \n",
       "1  0.061714  EDU_EXPERIENCE_99  0.073387  \n",
       "0  0.000620       OCCUPATION_1  0.035576  \n",
       "1  0.034956       OCCUPATION_1  0.035576  \n",
       "0  0.002027  OCCUPATION_TYPE_1  0.051823  \n",
       "1  0.049796  OCCUPATION_TYPE_1  0.051823  \n",
       "0  0.005247  OCCUPATION_TYPE_3  0.066554  \n",
       "1  0.061307  OCCUPATION_TYPE_3  0.066554  \n",
       "0  0.004106  OCCUPATION_TYPE_4  0.048790  \n",
       "1  0.044684  OCCUPATION_TYPE_4  0.048790  \n",
       "0  0.045804  OCCUPATION_TYPE_5  0.092888  \n",
       "1  0.047084  OCCUPATION_TYPE_5  0.092888  \n",
       "0  0.004891  OCCUPATION_TYPE_z  0.035973  \n",
       "1  0.031082  OCCUPATION_TYPE_z  0.035973  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "split_box_seleced"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_box_woe_range(row):\n",
    "    tmp = str(row['box']).split(\"~\")\n",
    "    tmp.append(row['woe'])\n",
    "    return tmp\n",
    "\n",
    "for col in train_X_cols:\n",
    "    col_range_woe_lst = split_box_seleced[split_box_seleced['t']==col].apply(get_box_woe_range,axis=1) # 获取区间和woe值\n",
    "    for lst in col_range_woe_lst: # 循环将训练数据替换成woe\n",
    "        if len(lst) == 2: # 等值\n",
    "            [r,woe]=lst\n",
    "            train_X.loc[(train_X[col]==float(r)),col]=woe\n",
    "        elif len(lst)==3: # 区间值\n",
    "            [low,upper,woe]=lst\n",
    "            train_X.loc[(train_X[col]>=float(low) )&(train_X[col]<=float(upper)),col]=woe\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "tain_X_final = train_X[train_X_cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import roc_curve, auc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(352, 12), (152, 12), (352,), (152,)]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_train, x_test, y_train, y_test= train_test_split(tain_X_final,train_Y,test_size=0.3,random_state=0)\n",
    "[x.shape for x in (x_train, x_test, y_train, y_test)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7105263157894737"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = LogisticRegression()\n",
    "clf = model.fit(x_train,y_train)\n",
    "clf.score(x_test,y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
       "      dtype=int64)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.predict(x_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = clf.decision_function(x_test)\n",
    "fpr, tpr, threshold = roc_curve(y_test,y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.669298605414274"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "roc_auc = auc(fpr,tpr)\n",
    "roc_auc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEWCAYAAAB42tAoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xd4FGXXx/HvIfSqICrSkRZ6Cb33IgIKUqUXxYJix/JaH3t7EBRBEBsC0lEUpJcHhCAdpLfQQQg1IeW8f8wSQkjCJmSzm835XFcus7uzM4cx2V/mnplzi6pijDHGJCSDtwswxhjj2ywojDHGJMqCwhhjTKIsKIwxxiTKgsIYY0yiLCiMMcYkyoLCGGNMoiwojDHGJMqCwvgVEdkvIpdF5IKIHBORCSKSM9brdUVkkYicF5FQEZkjIuXirCO3iHwuIgdd69ntenxH6v+LjPE+Cwrjj+5X1ZxAFaAqMBxAROoA84FZwD1AcWAjsFJESriWyQwsBMoDrYHcQF3gNFDT04WLw34vjU+xH0jjt1T1GDAPJzAAPgS+V9X/qup5Vf1XVV8FVgNvuJbpDRQBHlDVbaoaraonVPVtVZ2b2PZEpLCITBeRkyJyWkRGup5/Q0R+jLVcMRFREcnoerxERP4jIiuBS8DLIhIcZ93DRGS26/ssIvKx64jnuIiMFpFst7a3jEmYBYXxWyJSCGgD7BaR7DhHBr/Es+gUoIXr++bAH6p6IYnbCgB+BQ4AxYCCwKQkrKIXMBjIBXwBlBGRUrFe7wFMdH3/AVAaJwBLurb1f0mp15iksKAw/mimiJwHDgEngNeBvDg/70fjWf4ocPX8Q74ElrmZmjjDWc+r6kVVDVPVFUl4/wRV3aqqkaoaijM81h3AFRhlgdkiIsAgYJjriOg88C7QLRk1G+MWCwrjjzqqai6gMc4H7B3AGSAaKBDP8gWAU67vTyewzM0UBg6oamQy3gtOqMU2EVdQ4BxNzFTVS0B+IDuwTkTOishZ4A/X88Z4hAWF8VuquhSYAHysqheBVcBD8SzaBecENsACoJWI5Eji5g4BRa6ed4jjIs6H+1V3x1dunMfzgTtEpApOYFwddjoFXAbKq+ptrq88rpP3xniEBYXxd58DLVwfuC8BfURkqIjkEpHbReQdoA7wpmv5H3A+9KeJSFkRySAi+UTkZRFpm8h21uAMWb0vIjlEJKuI1HO9tgFoKCJFRCQPrquwEuM6MpkKfIQzbPan6/loYCzwmYjcCSAiBUWkVVJ2ijFJYUFh/JqqngS+B15znTNoBTyI86F+AOfy2fqqusu1fDjOCe1/cD6cz+GEwB3AX4lsJwq4H+fk8kEgBOjqeu1PYDKwCViHc9LbHRNdtfwSZ0jrRWA3sFpEzuEcBZVxc53GJJnYDHfGGGMS47EjChEZLyInRGRLAq+LiIxw3fW6SUSqeaoWY4wxyefJoacJOHe2JqQNUMr1NRj4yoO1GHPLXOcYLiTwVcTb9RnjKfFdoZEiVHWZiBRLZJEOOHfJKs5Y620iUkBVk3MNuzEep6oHAbu6yKQ7HgsKNxTk+mvHQ1zP3RAUIjIY56iDHDlyVC9btmyqFGiMMW6LjoBTW0GjvF3JdQ6cycPZy1mJjD5+SlWTdb+NN4NC4nku3jPrqjoGGAMQFBSkwcHB8S1mjDHeoQozO8DBndBlJeQs6OVynI9SEeGrb7Zz4uRl3nivw4Hkrs+bQRGCczfrVYWAI16qxRhjkm/7j7B3DjT6BArU8mophw+fY8iQ3+jatTw9e1ZiyLBCALzxXvLX6c37KGYDvV1XP9UGQu38hDEmzblwBBYNhXvqQbWnvFaGqjJ27DrKlfuSBQv2cuHClRRbt8eOKETkZ5xeO3eISAhOY7ZMAKo6GpgLtMW5cegS0M9TtRhjjFtClsOGUaDR7r/n3+0QFQatxkOGAM/Vlog9e/5l0KA5LF68nyZNijF27P3ce2/eFFu/J6966n6T1xV43FPbN8aYJLl0AmY/6IRE9rvcf59kgBZjIW9pz9V2E5s3n2DduqOMGdOOgQOr4TQZTjnePEdhjDG+Y+HjcOUc9FoP+crdfHkv27LlBH//fZTevSvTsWNZ9u4dSr582W/+xmSwXk/GGLNjCuycCnXe9PmQuHIlijfeWEK1al/zyiuLCAtz2oB5KiTAjiiMMendpRPO0cTdNaHGc96uJlF//RXCgAGz2br1JA8/XInPPmtF1qye/xi3oDDGpF+qsOAxZ8ip9beQwXc/Eg8fPkeDBt9y1105+fXX7tx3X+qdE/HdvWKMMZ62YzLsmgYN3vfZIaedO09TunQ+ChbMzeTJnWnWrAS5c2dJ1RrsHIUxJn26eBwWPuEMOQU96+1qbnD2bBiDB8+hbNmRLFvm3FT9wAOBqR4SYEcUxpj0SBUWDIGIC9B6gs8NOc2evYMhQ37j2LELPP98XWrUuMer9fjW3jHGmNTwz8+wewY0+ADyBXq7musMHDibcePWU7Hincya1Y2gIO+GBFhQGGPSC1U4sADWfwF7f3V6MvnIkFPsJn5BQfdQtGgeXnyxPpkze+dO77gsKIwx/u3KBdj2Pawf6bTbyJYfar8C1Z72WsuN2A4dCuXRR3+jW7fy9OpVmUcfDfJ2STewoDDG+Kczu52+TVvGO5e/3lUdWn8HZbpAxqzero7oaOXrr4N58cUFREUpDzzgu/PsWFAYY/yHRruGl0bA3rnOEUPph6Dqk1CgNqRwD6Tk2rXrNAMHzmHZsgM0b16CMWPaUbz47d4uK0EWFMaYtO/Kedj6vXP+4cwOyH4n1H4NKj8COb1/MjiubdtOsmnTccaPb0/fvlVSvIlfSrOgMMakXWd2uYaXvnWGl+6uAW1+cI4iMqb+/QaJ2bjxGBs2HKNPnyp06FCWvXuLcvvt2bxdllssKIwxaYtGw/75ztHDvrmQIZNz3qHqk16fXS4+4eGRvPPOMt5/fyUFCuSka9cKZM2aMc2EBFhQGGPSivBzsPU72DASzuyEHHdDnTec4aUcd3u7unitWnWIAQNms337KXr3rsynn7ZMlSZ+KS3tVWyMSV/+3emEw9YJzrmIArWg7U9QujMEZPZ2dQk6fPgcjRpN4O67czJ3bg/atCnl7ZKSzYLCGOMZl07Bqjfh1KbkryPyMhxb6xpe6uoaXqqZcjV6wPbtJwkMzE/BgrmZMuUhmjUrTq5cvnW+JKmsKaAxJmVFR8GGr+Db0rBptHNHtGRI3lemnFD3TRh8ENr+4NMhcebMZfr3n0W5cl+yfLnTxK9jx7JpPiTAjiiMMSnpyGpnEqATf0PhJtBspM+2705JM2Zs57HH5nLy5EWGD69PjRoFvV1SirKgMMbcuksnYflLzl3QOe+B+yY5VyL5+P0BKaF//1l8++0GqlS5m99+60G1agW8XVKKs6AwxiRfdBRsHA0rX3Vadgc9D3Veg8y5vF2ZR8Vu4le7diFKlcrLc8/VJVMm7/eO8gQLCmNM8hxZ5RpmWg9FmkLTkT7XstsTDhw4yyOP/EqPHhXp3bsygwdX93ZJHmcns40xSXPpBPzRD36u63zfbjJ0XuD3IREdrYwatYYKFb5ixYqDREREebukVGNHFMYY90RHOlcz/e81iLgENV6E2q9C5pzerszjduw4xcCBc1ix4iAtW97L11+3o1ix27xdVqqxoDDG3Nzhlc4w08mNUKQ5NP0C8vluW+yUtmPHabZuPcGECR3o3buyzzfxS2kWFMaY6104AsfXOd+rwu7pTuuMnIXg/l+gVKd0cTXT+vVH2bDhGP36VaV9+zLs3fsUt93m/XksvMGCwhhzvT8fcaYKvSpDJqj5kjPMlCmH9+pKJWFhkbz11lI+/HAlBQvmpnv3imTNmjHdhgRYUBhj4oq8BPkrQ6txzuMc90BO/7s3ID4rVx5kwIDZ7Nhxmn79qvDJJ2mziV9Ksz1gjLlR5lzO1KHpyOHD52jS5DsKFszNvHkP07Llvd4uyWdYUBhj0rVt205SrpzTxG/atC40aVKcnDl9tyutN9h9FMaYdOnffy/Tt+9Mypf/kmXLnCZ+999fxkIiHnZEYYxJd6ZN28bjj8/l9OnLvPJKA2rW9K8mfinNgsIYk6707TuT777bSLVqBfjjj4epUsU3Z8fzJRYUxhi/F7uJX926hQkMvINnn61Lxow2+u4Oj+4lEWktIjtEZLeIvBTP60VEZLGIrBeRTSLS1pP1GGPSn337ztCy5Y98//1GAAYPrs6LL9a3kEgCj+0pEQkARgFtgHJAdxGJO4PJq8AUVa0KdAO+9FQ9xphERIbBoaXwvzfh1BZvV5MioqKiGTHiLypU+IrVq0NwHVSYZPDk0FNNYLeq7gUQkUlAB2BbrGUUyO36Pg9wxIP1GGOuigyDo6vh0BLn6+hqiAoHBO6sCpWHeLnAW7N9+0kGDJjNqlUhtGlTktGj21GkSB5vl5VmeTIoCgKHYj0OAWrFWeYNYL6IPAnkAJrHtyIRGQwMBihSpEiKF2qM30soGCSDEwxVnoDCjaFgfcia9rui7t79Lzt2nOaHHx6gZ8+K6a6JX0rzZFDE938m7sFfd2CCqn4iInWAH0SkgqpGX/cm1THAGICgoCA7gDTmZiIuXwuGkCWuYLjiCoZqfhcMAOvWHWHjxuP071+V++8vw759T5E7dxZvl+UXPBkUIUDhWI8LcePQ0gCgNYCqrhKRrMAdwAkP1mVM2nL5tDPNaGJUIXRfwsFQdei1YMjiX0Mwly9H8OabS/n44/9RuHAeevRwmvhZSKQcTwbFWqCUiBQHDuOcrO4RZ5mDQDNggogEAlmBkx6syZi0IzoKVr/tfF1/kJ0wyeD0aKr6FBRu5JfBENuyZQcYOHA2u3b9y4ABVfn4Y2vi5wke26OqGikiTwDzgABgvKpuFZG3gGBVnQ08C4wVkWE4w1J9Ve3aBGO4eAx+6wGHFkNgTyjS7ObvyX6XKxhy33xZP3D48DmaNfuewoVzs2BBL5o1K+HtkvyWpLXP5aCgIA0ODvZ2GcZ4zoEFMLcnXDkPzb6ECn29XZFP2bz5OBUr3gXAr7/upEmTYuTIYf2ZbkZE1qlqUHLea3ecGOMroqNg5eswtSVkuwN6rrWQiOXUqUv06jWDSpVGxzTxa9eutIVEKrDBPGO85cwu5+TzVf9MdB6X7wfNvkgXs8m5Q1X55ZdtPPHEXM6cCeP11xtRq5Y18UtNFhTGpDZV2DoBFj4OkZevPZ8xO7SeAOX7eKsyn9Snz0x++GETQUH3sHBh+5hhJ5N6LCiMSU1XLsDCx2DbD1CkKTQd5cwmB87VSZlzerc+HxG7iV+jRkWpVOkunn66tvVn8hILCmNSy6ktMOch+HcH1H0Tar0CGQK8XZXP2bv3DIMGzeHhhyvSr19VBgyo5u2S0j2LZ2M8TRU2j4OfakL4WXhoIdT5PwuJOKKiovn889VUrPgVa9ceJkMGa7vhK+yIwhhPunIBFjwK23+CIs2h7Y+Qw8bY49q27ST9+8/ir78Oc999pRg9uh2FCqWP+0HSAgsKYzzl5CaY0wXO7oK6b0Gtl+0oIgH79p1hz54zTJz4IN26VbAmfj7GgsKYlKYKm7+BxUMhy23OUFPhxt6uyuesXXuYDRuOMWhQde67rzR79w4lVy7rz+SL7ByFMSnpynmY+zD8ORgKNoDeGy0k4rh0KYLnnptP7drjeO+9FYSFRQJYSPgwO6IwJqWc2Ai/doGzu6H+f6DmS06TPhNjyZL9DBw4mz17zvDII9X54IPm1sQvDbD/Q8bcKlXYNAYWPwXZ8kGXxVCoober8jkhIedo0eIHihbNw6JFvWnSpLi3SzJusqAw5laEn3OGmXZMhmKtoM0PkD2/t6vyKRs3HqNy5bspVCg3s2Z1o3HjYmTPnsnbZZkksONiY5Lr+Hr4sTrs/AXqvwsPzrWQiOXkyYv06DGNKlW+ZunS/QC0bVvKQiINsiMKY5JKFTaOhiXDnC6vXZZAoQberspnqCqTJm1h6NA/CA0N4803G1OnTuGbv9H4LLeCQkQyA0VUdbeH6zHGt4WHwvzBsHMKFGsNbb63o4g4evWawU8/baZWrYKMG9ee8uXv9HZJ5hbdNChE5D7gUyAzUFxEqgCvq+oDni7OGJ8Rfg42jITgT5ywaPA+1HjermpyiY5WRJwmfk2aFKN69QIMHVqLgADbP/7AnSOKt4BawGIAVd0gIiU9WpUxviL8HKz/AtZ9AmFnoEQ7p6HfXdao7qrdu/9l0KA59OpVif79rYmfP3In7iNU9Wyc59LW/KnGJFX4OVj9H/imGKx8Fe6p58w498AcCwmXyMhoPv74f1Ss+BXr1x8lc2ZrT+Kv3Dmi2C4iXYAMIlIceApY7dmyjPGS8HOwfgSs+9R1BHG/0+n17mRNNey3tmw5Qb9+swgOPkKHDmX48sv7uOeeXN4uy3iIO0HxBPB/QDQwHZgHDPdkUcakOlVY+xGsff9aQNR9He6q7u3KfNLBg6EcOHCWSZM60aVLeWvi5+fk6kxSCS4g8qCqTr/Zc6klKChIg4ODvbFp4882j4f5A6B4G6j3tgVEPP76K4SNG48zeLCzby5cuELOnJm9XJVxl4isU9VkHRq7c47i1XieeyU5GzPGJ5075NwTUagRPPCrhUQcFy9e4Zln5lGnzjg+/HAl4eFOEz8LifQjwaEnEWkFtAYKisinsV7KjTMMZUzapwp/DoLoSGg13i53jWPRon0MGjSHvXvPMGRIEO+/35wsWew+3fQmsf/jJ4AtQBiwNdbz54GXPFmUMalmy3jYPw+afgG3lfB2NT4lJOQcrVr9SPHit7F0aV8aNizq7ZKMlyQYFKq6HlgvIj+palgq1mRM6ji1FZY848wXUeUxb1fjM9avP0rVqgUoVCg3c+Z0p1GjomTLZv2Z0jN3jrMLisgkEdkkIjuvfnm8MmM8KWQ5TKoPmXLYkJPL8eMX6Np1KtWqjYlp4te6dUkLCeNWUEwAvgUEaANMASZ5sCZjPGvXDJjaArLfBT1WQZ70PS+CqvLjj5soV+5LZs78h3feaULdutbEz1zjzlmp7Ko6T0Q+VtU9wKsistzThRnjERtHw8LH4e6azhVO2fJ5uyKv69FjOpMmbaFOnUKMG9eewEBrcmiu505QhItzN80eEXkUOAxYO0iTtqjC/16H1W87/ZraTYZM2b1dldfEbuLXsmUJ6tQpxOOP17AmfiZe7gTFMCAnMBT4D5AH6O/JooxJUdGR8OejsGUcVBgALUZDhvR7iefOnacZNGgOvXtXYsCAavTrV9XbJRkfd9PfFlX9y/XteaAXgIgU8mRRxqSYiEvwa1fY+yvUfs3p/JpO201ERkbz6aereP31JWTNmtFOUhu3JRoUIlIDKAisUNVTIlIeeBFoClhYGN926RTMvB+O/gXNvoQqQ7xdkdds2nSc/v1nsW7dUR54oCyjRrWlQAFr4mfck9id2e8BnYCNOCewZ+B0jv0AeDR1yjMmmUL3w7TWcG4/tJ8KpR70dkVeFRJyjkOHzvHLLw/RqVOgNfEzSZLYEUUHoLKqXhaRvMAR1+Md7q5cRFoD/wUCgG9U9f14lukCvIEzx8VGVe2RhPqNudGJjTC9DURehs5/ptv5rP/3v0Ns2nScRx8Nom3bUuzdO5QcOaw/k0m6xC5xCFPVywCq+i/wTxJDIgAYhXPvRTmgu4iUi7NMKZyW5fVUtTzwdBLrN+Z6BxfD5IbODXTdlqfLkLhw4QpPPfU79euP55NPVsU08bOQMMmV2BFFCRG52kpcgGKxHqOqNzuWrwnsVtW9ACIyCecoZVusZQYBo1T1jGudJ5JYvzHX7JgCv/eC20rCg39A7vR309j8+XsYPHgOBw+G8vjjNXj33WbWxM/cssR+gjrFeTwyiesuCByK9TgEZ+7t2EoDiMhKnOGpN1T1j7grEpHBwGCAIkWKJLEMky78/QUsfgoK1oMOsyBbXm9XlOoOHQrlvvsmcu+9t7NsWT/q17ffFZMyEmsKuPAW1x3f2bK4syRlBEoBjXGuolouIhXiztGtqmOAMeBMXHSLdRl/ogorXoY170PJjtB2ImTK5u2qUtW6dUeoXv0eChfOw9y5PWjQoChZs9pRhEk5nvxpCgFiH/sXwjkhHneZ1aoaAewTkR04wbHWg3WZ1HDuAKwf6ZxQ9qSze2D/H1DpEWg2CjIEeHZ7PuTYsQs8+eTvTJ26jSVL+tCoUTFatLjX22UZP+TJoFgLlBKR4jhtP7oBca9omgl0ByaIyB04Q1F7PViTSQ27Z8O8vnDlAmTO7dltZQiA+v+BmsPTzY10qsr3329k2LB5XLoUwbvvNrUmfsaj3A4KEcmiquHuLq+qkSLyBDAP5/zDeFXdKiJvAcGqOtv1WksR2QZEAc+r6umk/ROMz4i6AsuHw7pP4c6q0G4K3F7S21X5nW7dpjFlylbq1SvMN9+0p2zZO7xdkvFzopr4kL+I1ATGAXlUtYiIVAYGquqTqVFgXEFBQRocHOyNTZvEhO53WmUcWwNVHodGH0PGrN6uym/EbuL33XcbOH/+Co89VoMMGdLHUZS5dSKyTlWDkvNed44oRgDtcIaJUNWNItIkORszfmr3LPijL2g03P8LlO7s7Yr8yj//nGLgwNn07VuFgQOr0adPFW+XZNIZd3oKZ1DVA3Gei/JEMSaNiboCi4fBrI5w273Q628LiRQUERHFu+8up3Ll0WzbdpKcOe2GOeMd7hxRHHINP6nrbusnAZsKNb0L3ecaaloLVZ+Ehh9BxizerspvbNhwjH79ZrFhwzE6dy7HF1+04e67c3q7LJNOuRMUQ3CGn4oAx4EFrudMerVrBszr53zfflq6b7jnCceOXeDYsQtMm9aFBx8M9HY5Jp1zJygiVbWbxysxvi/qCix7Af7+L9xdA+6bBLeV8HZVfmPFioNs2nScxx6rQevWJdmzZyjZs9ucEcb73AmKta4b4SYD01X1vIdrMr4i7CyEu26SDzsDfw6G48FQ7Wlo+AEE2Jh5Sjh/PpzhwxcyatRaSpXKy4ABVcmSJaOFhPEZ7sxwd6+I1MW5Ye5NEdkATFLVSR6vznhPxCUYUwgiLl57Lstt0H4GlOrovbr8zLx5uxk8+FcOHQrlqadq8c47Ta2Jn/E5bv1Equr/gP+JyBvA58BPgAWFP4u45IREYE8o0ty567lIM8hlExumlEOHQmnX7mdKlszLihX97e5q47NuGhQikhOnPXg3IBCYBdT1cF3GVxSoDRX6ersKv6GqrF17hJo1C1K4cB5+/70n9esXsSZ+xqe5cx/FFqA28KGqllTVZ1X1Lw/XZYzfOXr0PJ06TaFWrW9YunQ/AM2bl7CQMD7PnZ/QEqoa7fFKjPFTqsqECRt45pn5hIVF8sEHzalXz+aKMGlHgkEhIp+o6rPANBG5oSGUGzPcmbRKFdZ+6Hyf/S7v1uIHunSZytSp22jQoAjffNOe0qXzebskY5IksSOKya7/JnVmO5OWqTr3SgR/7DT3s5YcyRIVFY2IkCGDcP/9pWnatBiPPBJkTfxMmpTgOQpVXeP6NlBVF8b+wjmpbfzNdSHxBDT9It3M8ZCStm8/SYMG3zJu3N8A9O5dmSFDrNOrSbvcOZndP57nBqR0IcbLVGHp87FCYoSFRBJFRETxzjvLqFLla3bsOE2ePNZm3fiHxM5RdMW5JLa4iEyP9VIu4Gz87zJp0tWQWPeJ0+CvyX8tJJJo/fqj9O07i02bjtO1a3lGjGjDnXfm8HZZxqSIxM5RrAFO48x1PSrW8+eB9Z4syqQiVVj6nDMrnYVEsh0/fpFTpy4xc2ZXOnQo6+1yjElRN53hztfYDHcpSBWWPgvrPoOqQ6HJ5xYSSbBs2QE2bz7O44/XBODy5QiyZbP+TMY33coMdwmeoxCRpa7/nhGRf2N9nRGRf5NbrPERsUOi2lMWEklw7lw4jz32G40aTWDEiDWEh0cCWEgYv5XY0NPV6U5t5nZ/owpLnoG/P3dCovFnFhJumjt3F4888itHjpznmWdq89ZbTayJn/F7Cf6Ex7obuzBwRFWviEh9oBLwI3AuFeozKe26kHgaGn9qIeGmQ4dC6dBhEmXK5GPq1IeoVcsaJJr0wZ3LY2fiTIN6L/A9zj0UEz1alfEMVVgyzEIiCVSV1atDAChcOA/z5z/M338/YiFh0hV3giJaVSOAB4HPVfVJoKBnyzIpLiYk/gvVh1lIuOHIkfN07DiZOnXGxTTxa9KkOJkzB3i3MGNSmVtToYrIQ0Av4OqMNXbWLi1RhcVPw/oRTkg0+sRCIhGqyrhx63nuufmEh0fx8cctrImfSdfcCYr+wGM4bcb3ikhx4GfPlmVSzHUh8Qw0+thC4iY6d/6F6dO306hRUb75pj0lS+b1dknGeJVb91GISEagpOvhblWN9GhVibD7KJJAFRY/Beu/gOrPQqOPLCQSELuJ3w8/bOTSpQgGDapu/ZmM3/DIfRSxVt4A2A2MA8YDO0WkXnI2ZlKRhYTbtmw5Qb1642Oa+PXqVdk6vRoTiztDT58BbVV1G4CIBAI/AMlKJpMKVGHRUNgwEoKeg4YfWkjE48qVKN57bzn/+c9y8uTJyu23Z/N2Scb4JHeCIvPVkABQ1e0iktmDNZlboQqLnoQNoywkErFu3RH69p3Fli0n6NGjIp9/3or8+a2JnzHxcSco/haRr3GOIgB6Yk0BfdN1IfE8NPzAQiIBp09f5uzZMObM6U67dqW9XY4xPu2mJ7NFJCswFKgPCLAM+EJVwzxf3o3sZHYCVGHhE7DxSwuJBCxevI/Nm08wdGgtAMLCIsma1dpvmPThVk5mJ/pbIiIVgXuBGar6YXI2YFJB7JCo8QI0eN9CIpbQ0DBeeOFPxoz5m7Jl7+CRR6qTJUtGCwlj3JRY99iXcdp39AT+FJH4Zroz3qbRsPBxV0i8aCERx5w5OyhX7ku++WY9zz1Xh3XrBlsTP2OSKLHfmJ5AJVW9KCL5gbk4l8caX6HRriOJr1wh8Z6FRCyHDoXSqdMUypa9g5kzu1KjhnWeMSY5EguKcFW9CKCqJ0XEnb5QJrXEHEmMhpovQf13LSRw2m+sWhVC3bqFXU2mMWveAAAgAElEQVT8elG3bmHrz2TMLUjsw7+EiEx3fc0A7o31eHoi74shIq1FZIeI7BaRlxJZrrOIqIjYvRnusJCIV0jIOdq3n0S9euNjmvg1blzMQsKYW5TYEUWnOI9HJmXFIhKAM9d2CyAEWCsis2Pfk+FaLhfOVVV/JWX96ZZGw4LHYNPXUHM41P9Pug+J6Ghl7Nh1PP/8n0RGRvPppy2pX9+a+BmTUhKbuGjhLa67Jk5fqL0AIjIJ6ABsi7Pc28CHwHO3uD3/p9GwYAhsGmMhEUunTlOYOfMfmjYtztix91OixO3eLskYv+LJ8w4FgUOxHocQZx4LEakKFFbVXxNbkYgMFpFgEQk+efJkyleaFsQOiVovp/uQiIyMJjrauQeoU6dAxo69nwULellIGOMBngyK+D7FYu7uc50c/wx49mYrUtUxqhqkqkH58+dPwRLTCI2GPx91hcQrUO+ddB0SmzYdp06dcYwduw6Ahx+uxMCB1ZB0vE+M8SS3g0JEsiRx3SE4821fVQg4EutxLqACsERE9gO1gdl2QjuOqyGxeawrJN5OtyERHh7J668vpnr1MRw4cNZ6MxmTSm5655GI1MRpMZ4HKCIilYGBrilRE7MWKOWa6Ogw0A3ocfVFVQ0F7oi1nSXAc6pq/Tmu0mj48xHY/A3UfhXqvpVuQ2Lt2sP07TuLbdtO0qtXJT77rBX58mX3dlnGpAvu3KI6AmiHc5c2qrpRRJrc7E2qGikiTwDzgABgvKpuFZG3gGBVnX0Ldfs/C4nrnDkTxoULV5g7twdt2pTydjnGpCvuBEUGVT0QZ/w3yp2Vq+pcnDu6Yz/3fwks29iddaYLGg3zB8OWcVD7Naj7ZroMiUWL9rF583Geeqo2LVvey86dT1j7DWO8wJ1zFIdcw08qIgEi8jSw08N1pV8WEpw9G8agQbNp1ux7vv56HeHhzsy7FhLGeIc7QTEEeAYoAhzHOek8xJNFpVsaDfMHuULi/9JlSMya9Q/lyo1i/PgNvPBCXWviZ4wPuOlvoKqewDkRbTwpJiTGQ53Xoe4b3q4o1R08GMpDD/1CYGB+Zs/uTlDQPd4uyRiDe1c9jSXW/Q9Xqepgj1SUHmk0zBsIW79NdyGhqqxYcZAGDYpSpEgeFizoTe3ahaw/kzE+xJ2hpwXAQtfXSuBOINyTRaUr6TgkDh4M5b77JtKw4YSYJn4NGxa1kDDGx7gz9DQ59mMR+QH402MVpSfRUTB/IGydkK5CIjpaGT06mBdfXICqMmJEa2viZ4wPS85ZwuJA0ZQuJN25LiTegLqve7uiVPPgg5OZNWsHLVqUYMyY+ylW7DZvl2SMSYQ75yjOcO0cRQbgXyDBuSWMG6KjYP4A2PpdugmJyMhoMmQQMmQQunYtT4cOZejbt4r1ZzImDUg0KMT5La6M04IDIFpVbzixbZIgdkjUfRPqxHv/oV/ZuPEY/fvPZtCgajz6aBDdu1f0dknGmCRI9GS2KxRmqGqU68tC4lZER8G8/q6QeMvvQyIsLJJXX11EUNBYQkLOcffdOb1dkjEmGdw5R7FGRKqp6t8er8afRUfBvH6w7QdXSLzm7Yo8as2aw/TpM5N//jlFnz6V+fTTVuTNm83bZRljkiHBoBCRjKoaCdQHBonIHuAizjwTqqrVUqnGtC92SNR722ny5+fOnQvn8uUI/vijJ61alfR2OcaYW5DYEcUaoBrQMZVq8U/pKCTmz9/D1q0nGDasDs2bl2DHDmviZ4w/SOy3WABUdU8q1eJ/oqPgj76w/UdnVrrar3i7Io84c+YyzzwznwkTNlC+fH4ee6wGWbJktJAwxk8k9pucX0SeSehFVf3UA/X4j3QSEtOnb+fxx+dy8uRFhg+vz//9XyMLCGP8TGK/0QFATuKf+9okJjoK/ugD23+C+v+BWi97uyKPOHgwlG7dplKhwp3MnduDqlULeLskY4wHJBYUR1X1rVSrxF9ER8HvveGfiVD/Xag13NsVpShVZdmyAzRqVIwiRfKwaFEfatUqSKZM1p/JGH+V2H0UdiSRVNGRfh0SBw6cpU2bn2jc+LuYJn716xexkDDGzyV2RNEs1arwB9GR8HsfV0i8B7X8p8tJdLTy5ZdreemlBQB88UUbGjSwdl/GpBcJBoWq/puahaRpMUcSP/tdSAB07DiJOXN20qrVvXz9dTuKFrUmfsakJ3Z5yq2KjoS5vWDHJGjwPtR80dsVpYiIiCgCAjKQIYPQvXsFOncuR69elayJnzHpkDsTF5mE+GlI/P33UWrW/IbRo4MB6N69Ir17V7aQMCadsqBIrutC4gO/CInLlyMYPnwBNWuO5dixCxQunNvbJRljfIANPSVHdCTMfRh2TIaGH0KN571d0S1bvTqEPn1msnPnafr3r8LHH7fk9tutiZ8xxoIi6fwwJAAuXrxCREQUf/7Zi+bNS3i7HGOMD7GgSIroSPitJ+ycAg0/ghrPebuiW/LHH7vZuvUEzz5bl2bNSvDPP0+QObPdE2GMuZ6do3CXH4XE6dOX6NNnJm3a/MR3323kypUoAAsJY0y8LCjcERUBv/VwQqLRx2k2JFSVqVO3Ua7cl0ycuJlXX23A2rWDLCCMMYmyoaebiYqAuT1h5y/Q6BMISrChrs87eDCUHj2mUanSXcyf/zCVK9/t7ZKMMWmABUVioiJgbg/YOTXNhoSqsnjxfpo2LU7RorexZElfatYsSMaMdjBpjHGPfVokJHZINP40TYbEvn1naNnyR5o1+z6miV/duoUtJIwxSWJHFPGJioDfusOuaU5IVB/m7YqSJCoqmpEj1/Dyy4sICBC++uo+a+JnjEk2C4q40nhIAHToMInffttF27alGD36PgoXzuPtkowxaZgFRWxREfBbN9g1HRp/BtWf9nZFbovdxK9Xr0p0716BHj0qWn8mY8wt8+hgtYi0FpEdIrJbRG7ovS0iz4jINhHZJCILRcR74yOxQ6LJ52kqJIKDjxAUNJavvloLQNeuFejZ0zq9GmNShseCQkQCgFFAG6Ac0F1EysVZbD0QpKqVgKnAh56qJ1FREfBr12shUe0pr5SRVJcvR/Dii39Sq9Y3nDx50eaJMMZ4hCeHnmoCu1V1L4CITAI6ANuuLqCqi2Mtvxp42IP1xC/qCvzaDXbPgCb/hWpDU72E5Fi16hB9+sxk165/GTiwKh991JLbbsvq7bKMMX7Ik0FREDgU63EIUCuR5QcAv8f3gogMBgYDFClSJKXqS7MhAXD5ciTR0cqCBb1o1sya+BljPMeTQRHfALnGu6DIw0AQ0Ci+11V1DDAGICgoKN51JFnUFWe4afdMaDICqj2ZIqv1pLlzd7F16wmef74eTZsWZ/v2x8mUydpvGGM8y5Mns0OAwrEeFwKOxF1IRJoDrwDtVTXcg/VcE3UF5nRJMyFx6tQlHn54OvfdN5Gfftoc08TPQsIYkxo8GRRrgVIiUlxEMgPdgNmxFxCRqsDXOCFxwoO1XHM1JPbMgqZf+HRIqCqTJm0hMHAUU6Zs5fXXG7FmjTXxM8akLo8NPalqpIg8AcwDAoDxqrpVRN4CglV1NvARkBP4xXUp50FVbe+pmq4PiZFQ9XGPbSolHDwYSp8+M6lc+S7GjWtPxYp3ebskY0w6JKopM+SfWoKCgjQ4ODjpb4y6AnMegj2zfTokVJWFC/fFzDK3enUINWrcQ0CA9WcyxiSfiKxT1aDkvDd9fPqkkZDYs+dfmjX7nhYtfohp4le7diELCWOMV/l/C4+oKzC7M+ydA81GQZXHvF3RDaKiovnvf//i1VcXkSlTAF9/3c6a+BljfIZ/B0VkuHMksXcONPsSqgzxdkXxuv/+n/n99920a1ear766j0KFcnu7JGOMieG/QREZDnM6w95ffTIkrlyJImNGp4lf375V6NWrEt26VbD+TMYYn+Ofg9+xQ6L5Vz4XEmvWHKZ69TF8+aXTxK9Ll/J0726dXo0xvsn/giIyHOZ0uhYSlR/1dkUxLl2K4Nln51GnzjjOnLnMvffe7u2SjDHmpvxr6CkmJH6D5qOh8iPerijGihUH6dNnJnv3nuGRR6rzwQfNyZPHmvgZY3yf/wSFD4cEXJ1YSFi8uA+NGxfzdjnGGOM2/wiKyHCY/SDsmwstvoZKg71dEQBz5uxg+/ZTvPBCPZo0Kc62bY+TMaP/jfYZY/ybf3xqbf7GCYnmo30iJE6evEiPHtNo334SP/+8JaaJn4WEMSYt8o9Prh2TIV95rw83qSoTJ24mMHAUU6du4623GvPXXwOtiZ8xJk1L+0NPF47A4RVQ9w1vV8LBg6H06zeLqlXvZty49pQvf6e3SzLGmFuW9oNi5zRAofRDXtl8dLTy5597aNWqJEWL3sby5f2oXr2A9WcyxviNtP9ptvMXuKMC5AtM9U3v2nWapk2/o3Xrn1i27AAANWsWtJAwxviVtP2JdnXYKZWPJiIjo/noo5VUqjSaDRuOMW5cexo0SMG5vI0xxoek7aEnLw07tWs3kXnz9tChQxm+/PI+7rknV6pu36QNERERhISEEBYW5u1STDqSNWtWChUqRKZMmVJsnWk8KKak2rBTeHgkmTIFkCGDMHBgNfr3r8pDD5Wz/kwmQSEhIeTKlYtixYrZz4lJFarK6dOnCQkJoXjx4im23rQ79HT+MBxeCaW7eHxTq1eHUK3aGEaNWgNA587l6NKlvP3ym0SFhYWRL18++zkxqUZEyJcvX4ofxabdoNj+E54edrp48QrDhv1B3brjOH8+nFKl8nlsW8Y/WUiY1OaJn7m0OfQUug9WvwXFWkG+sh7ZxPLlB+jTZyb79p3lsceCeO+95uTOncUj2zLGGF+WNo8o5g0AyQAtxnhsE5GR0WTKFMDSpX0ZNeo+CwmTJgUEBFClShUqVKjA/fffz9mzZ2Ne27p1K02bNqV06dKUKlWKt99+G1WNef33338nKCiIwMBAypYty3PPPeeNf0Ki1q9fz8CBA71dRqLee+89SpYsSZkyZZg3b168y6gqr7zyCqVLlyYwMJARI0YA8NFHH1GlSpWY/4cBAQH8+++/XLlyhYYNGxIZGZk6/whVTVNf1QOLqH6M6saxmtJmzNiu7767LOZxRERUim/DpB/btm3zdgmaI0eOmO979+6t77zzjqqqXrp0SUuUKKHz5s1TVdWLFy9q69atdeTIkaqqunnzZi1RooRu375dVVUjIiJ01KhRKVpbRETELa+jc+fOumHDhlTdZlJs3bpVK1WqpGFhYbp3714tUaKERkZG3rDc+PHjtVevXhoV5XzmHD9+/IZlZs+erU2aNIl5/MYbb+iPP/4Y73bj+9kDgjWZn7tpb+jpQogz5FRxQIqt8vjxCzz55O/88ss2qlUrwLPP1iVz5gBr4mdSzuKn4cSGlF3nnVWgyeduL16nTh02bdoEwMSJE6lXrx4tW7YEIHv27IwcOZLGjRvz+OOP8+GHH/LKK69QtqwztJsxY0Yee+yxG9Z54cIFnnzySYKDgxERXn/9dTp16kTOnDm5cOECAFOnTuXXX39lwoQJ9O3bl7x587J+/XqqVKnCjBkz2LBhA7fddhsAJUuWZOXKlWTIkIFHH32UgwcPAvD5559Tr16967Z9/vx5Nm3aROXKlQFYs2YNTz/9NJcvXyZbtmx8++23lClThgkTJvDbb78RFhbGxYsXWbRoER999BFTpkwhPDycBx54gDfffBOAjh07cujQIcLCwnjqqacYPPjWmozOmjWLbt26kSVLFooXL07JkiVZs2YNderUuW65r776iokTJ5Ihg/OZc+edN7b/+fnnn+nevXvM444dOzJ8+HB69ux5SzW6I+0FBUCLsZACJ2xUlR9/3MTTT8/jwoUr/Oc/TXn++bpkymRN/Ix/iYqKYuHChQwY4PyBtXXrVqpXr37dMvfeey8XLlzg3LlzbNmyhWefffam63377bfJkycPmzdvBuDMmTM3fc/OnTtZsGABAQEBREdHM2PGDPr168dff/1FsWLFuOuuu+jRowfDhg2jfv36HDx4kFatWrF9+/br1hMcHEyFChViHpctW5Zly5aRMWNGFixYwMsvv8y0adMAWLVqFZs2bSJv3rzMnz+fXbt2sWbNGlSV9u3bs2zZMho2bMj48ePJmzcvly9fpkaNGnTq1Il8+a6/iGXYsGEsXrz4hn9Xt27deOmll6577vDhw9SuXTvmcaFChTh8+PAN792zZw+TJ09mxowZ5M+fnxEjRlCqVKmY1y9dusQff/zByJEjY56rUKECa9euven+TglpLygCMkPuwimyqoMHQxk4cA5BQfcwblx7ypa9I0XWa8wNkvCXf0q6fPkyVapUYf/+/VSvXp0WLVoAzh9JCV0dk5SrZhYsWMCkSZNiHt9++82n933ooYcICHD+GOvatStvvfUW/fr1Y9KkSXTt2jVmvdu2bYt5z7lz5zh//jy5cl27ufXo0aPkz58/5nFoaCh9+vRh165diAgRERExr7Vo0YK8efMCMH/+fObPn0/VqlUB56ho165dNGzYkBEjRjBjxgwADh06xK5du24Iis8++8y9nQPXnfO5Kr79Gx4eTtasWQkODmb69On079+f5cuXx7w+Z84c6tWrF/NvAOf8U+bMmW/YL56Q9oLiFkVHK/Pm7aZNm1IULXobK1f2p2rVu60/k/FL2bJlY8OGDYSGhtKuXTtGjRrF0KFDKV++PMuWLbtu2b1795IzZ05y5cpF+fLlWbduXcywTkISCpzYz8W9pj9Hjhwx39epU4fdu3dz8uRJZs6cyauvvgpAdHQ0q1atIlu2bIn+22Kv+7XXXqNJkybMmDGD/fv307hx43i3qaoMHz6cRx65flqCJUuWsGDBAlatWkX27Nlp3LhxvPcjJOWIolChQhw6dCjmcUhICPfcc88N7y1UqBCdOnUC4IEHHqBfv37XvT5p0qTrhp2uuhownpauPh137jxN48YTaNt2IkuX7gcgKOgeCwnj9/LkycOIESP4+OOPiYiIoGfPnqxYsYIFCxYAzpHH0KFDeeGFFwB4/vnneffdd9m5cyfgfHB/+umnN6y3ZcuW1w2HXB16uuuuu9i+fXvM0FJCRIQHHniAZ555hsDAwJi/3uOud8OGG8/vBAYGsnv37pjHoaGhFCxYEIAJEyYkuM1WrVoxfvz4mHMohw8f5sSJE4SGhnL77beTPXt2/vnnH1avXh3v+z/77DM2bNhww1fckABo3749kyZNIjw8nH379rFr1y5q1qx5w3IdO3Zk0aJFACxdupTSpUtf9+9aunQpHTp0uO49p0+fJn/+/CnaqiMh6eITMjIymg8+WEGlSl+xefMJvv22Aw0bFvV2WcakqqpVq1K5cmUmTZpEtmzZmDVrFu+88w5lypShYsWK1KhRgyeeeAKASpUq8fnnn9O9e3cCAwOpUKECR48evWGdr776KmfOnKFChQpUrlw55i/t999/n3bt2tG0aVMKFCiQaF1du3blxx9/jBl2AhgxYgTBwcFUqlSJcuXKMXr06BveV7ZsWUJDQzl//jwAL7zwAsOHD6devXpERUUluL2WLVvSo0cP6tSpQ8WKFencuTPnz5+ndevWREZGUqlSJV577bXrzi0kV/ny5enSpQvlypWjdevWjBo1KmbYrW3bthw5cgSAl156iWnTplGxYkWGDx/ON998E7OOGTNm0LJly+uOigAWL15M27Ztb7lGd0h8Y2i+LKh4Ng3edzlJ72nV6kfmz9/Dgw8GMmpUW+6+O6eHqjPmmu3btxMYmPrt79OTzz77jFy5cvn8vRSe8OCDD/Lee+9RpkyZG16L72dPRNapalBytuW3RxRhYZFERUUDMHhwNaZOfYhp07pYSBjjR4YMGUKWLOnvZtgrV67QsWPHeEPCE/wyKFauPEiVKqMZNcq5dKxTp3J06lTOy1UZY1Ja1qxZ6dWrl7fLSHWZM2emd+/eqbY9vwqKCxeuMHTo7zRo8C1hYZEEBtrlrsa70trQrkn7PPEz5zeXxy5dup8+fWZy8GAoTzxRk3ffbUbOnJm9XZZJx7Jmzcrp06et1bhJNeqajyKlL5lNe0ERkPAOyJ49E8uX96NePZuW1HhfoUKFCAkJ4eTJk94uxaQjV2e4S0lp76qnoCANDg4GYPr07fzzzylefrkBAFFR0XZPhDHGxMNnr3oSkdYiskNEdovIDXejiEgWEZnsev0vESnmznqPHbtA585T6NRpCjNm/MOVK8410xYSxhiT8jw29CQiAcAooAUQAqwVkdmqui3WYgOAM6paUkS6AR8AXW9c2zWnT18iMHAUly9H8N57zXj22TrWxM8YYzzIk3+C1wR2q+peVb0CTAI6xFmmA/Cd6/upQDO5yVm/AwdCqVDhTjZufJSXXqpvIWGMMR7myZPZBYFDsR6HALUSWkZVI0UkFMgHnIq9kIgMBq42hg9fsaL/lrKemQE1rbmDOPsqHbN9cY3ti2tsX1yT7LvzPBkU8R0ZxD1z7s4yqOoYYAyAiAQn94SMv7F9cY3ti2tsX1xj++IaEQlO7ns9OfQUAsSeOKIQcCShZUQkI5AH+NeDNRljjEkiTwbFWqCUiBQXkcxAN2B2nGVmA31c33cGFmlau17XGGP8nMeGnlznHJ4A5gEBwHhV3Soib+FM8j0bGAf8ICK7cY4kurmx6jGeqjkNsn1xje2La2xfXGP74ppk74s0d8OdMcaY1GV3qBljjEmUBYUxxphE+WxQeKr9R1rkxr54RkS2icgmEVkoIn47z+vN9kWs5TqLiIqI314a6c6+EJEurp+NrSIyMbVrTC1u/I4UEZHFIrLe9XuSOnOIpjIRGS8iJ0RkSwKvi4iMcO2nTSJSza0Vq6rPfeGc/N4DlAAyAxuBcnGWeQwY7fq+GzDZ23V7cV80AbK7vh+SnveFa7lcwDJgNRDk7bq9+HNRClgP3O56fKe36/bivhgDDHF9Xw7Y7+26PbQvGgLVgC0JvN4W+B3nHrbawF/urNdXjyg80v4jjbrpvlDVxap6yfVwNc49K/7InZ8LgLeBD4Gw1CwulbmzLwYBo1T1DICqnkjlGlOLO/tCgdyu7/Nw4z1dfkFVl5H4vWgdgO/VsRq4TUQK3Gy9vhoU8bX/KJjQMqoaCVxt/+Fv3NkXsQ3A+YvBH910X4hIVaCwqv6amoV5gTs/F6WB0iKyUkRWi0jrVKsudbmzL94AHhaREGAu8GTqlOZzkvp5AvjuxEUp1v7DD7j97xSRh4EgoJFHK/KeRPeFiGQAPgP6plZBXuTOz0VGnOGnxjhHmctFpIKqnvVwbanNnX3RHZigqp+ISB2c+7cqqGq058vzKcn63PTVIwpr/3GNO/sCEWkOvAK0V9XwVKottd1sX+QCKgBLRGQ/zhjsbD89oe3u78gsVY1Q1X3ADpzg8Dfu7IsBwBQAVV0FZMVpGJjeuPV5EpevBoW1/7jmpvvCNdzyNU5I+Os4NNxkX6hqqKreoarFVLUYzvma9qqa7GZoPsyd35GZOBc6ICJ34AxF7U3VKlOHO/viINAMQEQCcYIiPc5ROxvo7br6qTYQqqpHb/Ymnxx6Us+1/0hz3NwXHwE5gV9c5/MPqmp7rxXtIW7ui3TBzX0xD2gpItuAKOB5VT3tvao9w8198SwwVkSG4Qy19PXHPyxF5GecocY7XOdjXgcyAajqaJzzM22B3cAloJ9b6/XDfWWMMSYF+erQkzHGGB9hQWGMMSZRFhTGGGMSZUFhjDEmURYUxhhjEmVBYXyOiESJyIZYX8USWbZYQp0yk7jNJa7uoxtdLS/KJGMdj4pIb9f3fUXknlivfSMi5VK4zrUiUsWN9zwtItlvddsm/bKgML7osqpWifW1P5W221NVK+M0m/woqW9W1dGq+r3rYV/gnlivDVTVbSlS5bU6v8S9Op8GLChMsllQmDTBdeSwXET+dn3VjWeZ8iKyxnUUsklESrmefzjW81+LSMBNNrcMKOl6bzPXHAabXb3+s7ief1+uzQHyseu5N0TkORHpjNNz6yfXNrO5jgSCRGSIiHwYq+a+IvJFMutcRayGbiLylYgEizP3xJuu54biBNZiEVnseq6liKxy7cdfRCTnTbZj0jkLCuOLssUadprheu4E0EJVqwFdgRHxvO9R4L+qWgXngzrE1a6hK1DP9XwU0PMm278f2CwiWYEJQFdVrYjTyWCIiOQFHgDKq2ol4J3Yb1bVqUAwzl/+VVT1cqyXpwIPxnrcFZiczDpb47TpuOoVVQ0CKgGNRKSSqo7A6eXTRFWbuFp5vAo0d+3LYOCZm2zHpHM+2cLDpHuXXR+WsWUCRrrG5KNw+hbFtQp4RUQKAdNVdZeINAOqA2td7U2y4YROfH4SkcvAfpw21GWAfaq60/X6d8DjwEicuS6+EZHfALdbmqvqSRHZ6+qzs8u1jZWu9Salzhw47Spiz1DWRUQG4/xeF8CZoGdTnPfWdj2/0rWdzDj7zZgEWVCYtGIYcByojHMkfMOkRKo6UUT+Au4D5onIQJy2yt+p6nA3ttEzdgNBEYl3fhNXb6GaOE3mugFPAE2T8G+ZDHQB/gFmqKqK86ntdp04s7i9D4wCHhSR4sBzQA1VPSMiE3Aa38UlwJ+q2j0J9Zp0zoaeTFqRBzjqmj+gF85f09cRkRLAXtdwy2ycIZiFQGcRudO1TF5xf07xf4BiIlLS9bgXsNQ1pp9HVefinCiO78qj8zhtz+MzHeiIM0fCZNdzSapTVSNwhpBqu4atcgMXgVARuQtok0Atq4F6V/9NIpJdROI7OjMmhgWFSSu+BPqIyGqcYaeL8SzTFdgiIhuAsjhTPm7D+UCdLyKbgD9xhmVuSlXDcLpr/iIim4FoYDTOh+6vrvUtxTnaiWsCMPrqyew46z0DbAOKquoa13NJrtN17uMT4DlV3YgzP/ZWYDzOcNZVY4DfRWSxqp7EuSLrZ52RV8cAAABESURBVNd2VuPsK2MSZN1jjTHGJMqOKIwxxiTKgsIYY0yiLCiMMcYkyoLCGGNMoiwojDHGJMqCwhhjTKIsKIwxxiTq/wHVsuZJjW3Z8QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(fpr,tpr,color='darkorange', label=\"ROC curve (area = %0.2f)\"%roc_auc)\n",
    "plt.plot([0,1],[0,1],color='navy',linestyle='--')\n",
    "plt.xlim([0.0,1.0])\n",
    "plt.ylim([0.0,1.0])\n",
    "plt.xlabel('False Positive Rate')\n",
    "plt.ylabel('True Positive Rate')\n",
    "plt.title('ROC_curve')\n",
    "plt.legend(loc=\"lower right\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 建立评分卡"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "coe = clf.coef_\n",
    "factor = clf.intercept_\n",
    "A = 50\n",
    "B = 7.5/np.log(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([36.48420081])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "base_score = A+B*factor\n",
    "base_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'AGE': 0.7875578027606502,\n",
       " 'CURR_FREEZE_VALUE': 0.9278274424179149,\n",
       " 'EDU_EXPERIENCE_70': 0.19919899821228426,\n",
       " 'OCCUPATION_TYPE_4': 0.7652321274342909,\n",
       " 'OCCUPATION_1': 0.8946336976580399,\n",
       " 'OCCUPATION_TYPE_z': -0.030663696286590155,\n",
       " 'OCCUPATION_TYPE_1': 0.7132596200304603,\n",
       " 'EDU_EXPERIENCE_20': 0.8260559849100345,\n",
       " 'EDU_EXPERIENCE_99': 0.5786869761189828,\n",
       " 'EDU_EXPERIENCE_10': 0.41719609291228765,\n",
       " 'OCCUPATION_TYPE_3': 0.6673961736302454,\n",
       " 'OCCUPATION_TYPE_5': 0.6185693606637712}"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coe_dct = dict(zip(train_X_cols,coe.tolist()[0]))\n",
    "coe_dct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\annocada\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>box</th>\n",
       "      <th>count</th>\n",
       "      <th>hit</th>\n",
       "      <th>all</th>\n",
       "      <th>expected_cnt</th>\n",
       "      <th>chi_sequare</th>\n",
       "      <th>woe</th>\n",
       "      <th>iv</th>\n",
       "      <th>t</th>\n",
       "      <th>iv_mount</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>21.0~22.0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>0.536876</td>\n",
       "      <td>1.130615</td>\n",
       "      <td>0.006224</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>23.0~27.0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>1.952381</td>\n",
       "      <td>0.464576</td>\n",
       "      <td>-0.815295</td>\n",
       "      <td>0.008351</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "      <td>-6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28.0~57.0</td>\n",
       "      <td>467</td>\n",
       "      <td>110</td>\n",
       "      <td>467</td>\n",
       "      <td>113.970238</td>\n",
       "      <td>0.138306</td>\n",
       "      <td>-0.046640</td>\n",
       "      <td>0.001991</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>58</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>3.578542</td>\n",
       "      <td>1.353759</td>\n",
       "      <td>0.040818</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>59.0~75.0</td>\n",
       "      <td>18</td>\n",
       "      <td>6</td>\n",
       "      <td>18</td>\n",
       "      <td>4.392857</td>\n",
       "      <td>0.587979</td>\n",
       "      <td>0.437468</td>\n",
       "      <td>0.007561</td>\n",
       "      <td>AGE</td>\n",
       "      <td>0.064946</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0~10000.0</td>\n",
       "      <td>386</td>\n",
       "      <td>105</td>\n",
       "      <td>386</td>\n",
       "      <td>94.202381</td>\n",
       "      <td>1.237639</td>\n",
       "      <td>0.146221</td>\n",
       "      <td>0.016980</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15000.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.244048</td>\n",
       "      <td>2.341609</td>\n",
       "      <td>5.042638</td>\n",
       "      <td>0.040732</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>2.196429</td>\n",
       "      <td>-5.671780</td>\n",
       "      <td>0.133518</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "      <td>-56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>30000.0~98000.0</td>\n",
       "      <td>72</td>\n",
       "      <td>12</td>\n",
       "      <td>72</td>\n",
       "      <td>17.571429</td>\n",
       "      <td>1.766551</td>\n",
       "      <td>-0.478823</td>\n",
       "      <td>0.028691</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "      <td>-4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>100000.0~500000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>5</td>\n",
       "      <td>36</td>\n",
       "      <td>8.785714</td>\n",
       "      <td>1.631243</td>\n",
       "      <td>-0.693934</td>\n",
       "      <td>0.028253</td>\n",
       "      <td>CURR_FREEZE_VALUE</td>\n",
       "      <td>0.248174</td>\n",
       "      <td>-6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>502</td>\n",
       "      <td>123</td>\n",
       "      <td>502</td>\n",
       "      <td>122.511905</td>\n",
       "      <td>0.001945</td>\n",
       "      <td>0.005263</td>\n",
       "      <td>0.000028</td>\n",
       "      <td>EDU_EXPERIENCE_10</td>\n",
       "      <td>0.021566</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>0.488095</td>\n",
       "      <td>-4.167702</td>\n",
       "      <td>0.021539</td>\n",
       "      <td>EDU_EXPERIENCE_10</td>\n",
       "      <td>0.021566</td>\n",
       "      <td>-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>476</td>\n",
       "      <td>120</td>\n",
       "      <td>476</td>\n",
       "      <td>116.166667</td>\n",
       "      <td>0.126495</td>\n",
       "      <td>0.043176</td>\n",
       "      <td>0.001780</td>\n",
       "      <td>EDU_EXPERIENCE_20</td>\n",
       "      <td>0.042580</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>28</td>\n",
       "      <td>3</td>\n",
       "      <td>28</td>\n",
       "      <td>6.833333</td>\n",
       "      <td>2.150407</td>\n",
       "      <td>-0.989649</td>\n",
       "      <td>0.040800</td>\n",
       "      <td>EDU_EXPERIENCE_20</td>\n",
       "      <td>0.042580</td>\n",
       "      <td>-8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>347</td>\n",
       "      <td>75</td>\n",
       "      <td>347</td>\n",
       "      <td>84.684524</td>\n",
       "      <td>1.107522</td>\n",
       "      <td>-0.157699</td>\n",
       "      <td>0.016425</td>\n",
       "      <td>EDU_EXPERIENCE_70</td>\n",
       "      <td>0.048762</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>157</td>\n",
       "      <td>48</td>\n",
       "      <td>157</td>\n",
       "      <td>38.315476</td>\n",
       "      <td>2.447836</td>\n",
       "      <td>0.310468</td>\n",
       "      <td>0.032337</td>\n",
       "      <td>EDU_EXPERIENCE_70</td>\n",
       "      <td>0.048762</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>409</td>\n",
       "      <td>109</td>\n",
       "      <td>409</td>\n",
       "      <td>99.815476</td>\n",
       "      <td>0.845114</td>\n",
       "      <td>0.118180</td>\n",
       "      <td>0.011674</td>\n",
       "      <td>EDU_EXPERIENCE_99</td>\n",
       "      <td>0.073387</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>95</td>\n",
       "      <td>14</td>\n",
       "      <td>95</td>\n",
       "      <td>23.184524</td>\n",
       "      <td>3.638439</td>\n",
       "      <td>-0.624777</td>\n",
       "      <td>0.061714</td>\n",
       "      <td>EDU_EXPERIENCE_99</td>\n",
       "      <td>0.073387</td>\n",
       "      <td>-3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>497</td>\n",
       "      <td>119</td>\n",
       "      <td>497</td>\n",
       "      <td>121.291667</td>\n",
       "      <td>0.043298</td>\n",
       "      <td>-0.025156</td>\n",
       "      <td>0.000620</td>\n",
       "      <td>OCCUPATION_1</td>\n",
       "      <td>0.035576</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>1.708333</td>\n",
       "      <td>3.074187</td>\n",
       "      <td>1.418297</td>\n",
       "      <td>0.034956</td>\n",
       "      <td>OCCUPATION_1</td>\n",
       "      <td>0.035576</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>488</td>\n",
       "      <td>115</td>\n",
       "      <td>488</td>\n",
       "      <td>119.095238</td>\n",
       "      <td>0.140820</td>\n",
       "      <td>-0.046031</td>\n",
       "      <td>0.002027</td>\n",
       "      <td>OCCUPATION_TYPE_1</td>\n",
       "      <td>0.051823</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>8</td>\n",
       "      <td>16</td>\n",
       "      <td>3.904762</td>\n",
       "      <td>4.295006</td>\n",
       "      <td>1.130615</td>\n",
       "      <td>0.049796</td>\n",
       "      <td>OCCUPATION_TYPE_1</td>\n",
       "      <td>0.051823</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>453</td>\n",
       "      <td>117</td>\n",
       "      <td>453</td>\n",
       "      <td>110.553571</td>\n",
       "      <td>0.375894</td>\n",
       "      <td>0.075678</td>\n",
       "      <td>0.005247</td>\n",
       "      <td>OCCUPATION_TYPE_3</td>\n",
       "      <td>0.066554</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>51</td>\n",
       "      <td>6</td>\n",
       "      <td>51</td>\n",
       "      <td>12.446429</td>\n",
       "      <td>3.338825</td>\n",
       "      <td>-0.884288</td>\n",
       "      <td>0.061307</td>\n",
       "      <td>OCCUPATION_TYPE_3</td>\n",
       "      <td>0.066554</td>\n",
       "      <td>-6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>452</td>\n",
       "      <td>116</td>\n",
       "      <td>452</td>\n",
       "      <td>110.309524</td>\n",
       "      <td>0.293551</td>\n",
       "      <td>0.067094</td>\n",
       "      <td>0.004106</td>\n",
       "      <td>OCCUPATION_TYPE_4</td>\n",
       "      <td>0.048790</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>52</td>\n",
       "      <td>7</td>\n",
       "      <td>52</td>\n",
       "      <td>12.690476</td>\n",
       "      <td>2.551639</td>\n",
       "      <td>-0.730137</td>\n",
       "      <td>0.044684</td>\n",
       "      <td>OCCUPATION_TYPE_4</td>\n",
       "      <td>0.048790</td>\n",
       "      <td>-6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>275</td>\n",
       "      <td>53</td>\n",
       "      <td>275</td>\n",
       "      <td>67.113095</td>\n",
       "      <td>2.967818</td>\n",
       "      <td>-0.301770</td>\n",
       "      <td>0.045804</td>\n",
       "      <td>OCCUPATION_TYPE_5</td>\n",
       "      <td>0.092888</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>229</td>\n",
       "      <td>70</td>\n",
       "      <td>229</td>\n",
       "      <td>55.886905</td>\n",
       "      <td>3.563974</td>\n",
       "      <td>0.310206</td>\n",
       "      <td>0.047084</td>\n",
       "      <td>OCCUPATION_TYPE_5</td>\n",
       "      <td>0.092888</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>426</td>\n",
       "      <td>110</td>\n",
       "      <td>426</td>\n",
       "      <td>103.964286</td>\n",
       "      <td>0.350407</td>\n",
       "      <td>0.075353</td>\n",
       "      <td>0.004891</td>\n",
       "      <td>OCCUPATION_TYPE_z</td>\n",
       "      <td>0.035973</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>78</td>\n",
       "      <td>13</td>\n",
       "      <td>78</td>\n",
       "      <td>19.035714</td>\n",
       "      <td>1.913763</td>\n",
       "      <td>-0.478823</td>\n",
       "      <td>0.031082</td>\n",
       "      <td>OCCUPATION_TYPE_z</td>\n",
       "      <td>0.035973</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 box  count  hit  all  expected_cnt  chi_sequare       woe  \\\n",
       "0          21.0~22.0      2    1    2      0.488095     0.536876  1.130615   \n",
       "1          23.0~27.0      8    1    8      1.952381     0.464576 -0.815295   \n",
       "2          28.0~57.0    467  110  467    113.970238     0.138306 -0.046640   \n",
       "3                 58      9    5    9      2.196429     3.578542  1.353759   \n",
       "4          59.0~75.0     18    6   18      4.392857     0.587979  0.437468   \n",
       "0        0.0~10000.0    386  105  386     94.202381     1.237639  0.146221   \n",
       "1            15000.0      1    1    1      0.244048     2.341609  5.042638   \n",
       "2            20000.0      9    0    9      2.196429     2.196429 -5.671780   \n",
       "3    30000.0~98000.0     72   12   72     17.571429     1.766551 -0.478823   \n",
       "4  100000.0~500000.0     36    5   36      8.785714     1.631243 -0.693934   \n",
       "0                  0    502  123  502    122.511905     0.001945  0.005263   \n",
       "1                  1      2    0    2      0.488095     0.488095 -4.167702   \n",
       "0                  0    476  120  476    116.166667     0.126495  0.043176   \n",
       "1                  1     28    3   28      6.833333     2.150407 -0.989649   \n",
       "0                  0    347   75  347     84.684524     1.107522 -0.157699   \n",
       "1                  1    157   48  157     38.315476     2.447836  0.310468   \n",
       "0                  0    409  109  409     99.815476     0.845114  0.118180   \n",
       "1                  1     95   14   95     23.184524     3.638439 -0.624777   \n",
       "0                  0    497  119  497    121.291667     0.043298 -0.025156   \n",
       "1                  1      7    4    7      1.708333     3.074187  1.418297   \n",
       "0                  0    488  115  488    119.095238     0.140820 -0.046031   \n",
       "1                  1     16    8   16      3.904762     4.295006  1.130615   \n",
       "0                  0    453  117  453    110.553571     0.375894  0.075678   \n",
       "1                  1     51    6   51     12.446429     3.338825 -0.884288   \n",
       "0                  0    452  116  452    110.309524     0.293551  0.067094   \n",
       "1                  1     52    7   52     12.690476     2.551639 -0.730137   \n",
       "0                  0    275   53  275     67.113095     2.967818 -0.301770   \n",
       "1                  1    229   70  229     55.886905     3.563974  0.310206   \n",
       "0                  0    426  110  426    103.964286     0.350407  0.075353   \n",
       "1                  1     78   13   78     19.035714     1.913763 -0.478823   \n",
       "\n",
       "         iv                  t  iv_mount  score  \n",
       "0  0.006224                AGE  0.064946      9  \n",
       "1  0.008351                AGE  0.064946     -6  \n",
       "2  0.001991                AGE  0.064946      0  \n",
       "3  0.040818                AGE  0.064946     11  \n",
       "4  0.007561                AGE  0.064946      3  \n",
       "0  0.016980  CURR_FREEZE_VALUE  0.248174      1  \n",
       "1  0.040732  CURR_FREEZE_VALUE  0.248174     50  \n",
       "2  0.133518  CURR_FREEZE_VALUE  0.248174    -56  \n",
       "3  0.028691  CURR_FREEZE_VALUE  0.248174     -4  \n",
       "4  0.028253  CURR_FREEZE_VALUE  0.248174     -6  \n",
       "0  0.000028  EDU_EXPERIENCE_10  0.021566      0  \n",
       "1  0.021539  EDU_EXPERIENCE_10  0.021566    -18  \n",
       "0  0.001780  EDU_EXPERIENCE_20  0.042580      0  \n",
       "1  0.040800  EDU_EXPERIENCE_20  0.042580     -8  \n",
       "0  0.016425  EDU_EXPERIENCE_70  0.048762      0  \n",
       "1  0.032337  EDU_EXPERIENCE_70  0.048762      0  \n",
       "0  0.011674  EDU_EXPERIENCE_99  0.073387      0  \n",
       "1  0.061714  EDU_EXPERIENCE_99  0.073387     -3  \n",
       "0  0.000620       OCCUPATION_1  0.035576      0  \n",
       "1  0.034956       OCCUPATION_1  0.035576     13  \n",
       "0  0.002027  OCCUPATION_TYPE_1  0.051823      0  \n",
       "1  0.049796  OCCUPATION_TYPE_1  0.051823      8  \n",
       "0  0.005247  OCCUPATION_TYPE_3  0.066554      0  \n",
       "1  0.061307  OCCUPATION_TYPE_3  0.066554     -6  \n",
       "0  0.004106  OCCUPATION_TYPE_4  0.048790      0  \n",
       "1  0.044684  OCCUPATION_TYPE_4  0.048790     -6  \n",
       "0  0.045804  OCCUPATION_TYPE_5  0.092888     -2  \n",
       "1  0.047084  OCCUPATION_TYPE_5  0.092888      2  \n",
       "0  0.004891  OCCUPATION_TYPE_z  0.035973      0  \n",
       "1  0.031082  OCCUPATION_TYPE_z  0.035973      0  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "split_box_seleced['score']=split_box_seleced.apply(lambda row:int(row['woe']*coe_dct.get(row['t'])*B),axis=1)\n",
    "split_box_seleced"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-105"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(split_box_seleced.groupby(['t'])['score'].min())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "84"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(split_box_seleced.groupby(['t'])['score'].max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "36.48420081426545"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "base_score[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
